set more off

*** nashville ***

use "dataset_nashville wave 1", clear
append using "dataset_census nashville"

* first get median age
preserve
	#delimit;
	local agecats
		22to24
		25to29
		30to34
		35to39
		40to44
		45to49
		50to54
		55to59
		60to61
		62to64
		65to66
		67to69
		70to74
		75to79
		80to84
		85to90
		;
	#delimit cr
	drop population populationfemale populationwhite populationcollege
	collapse (sum) population*
	xpose, clear varname
	gen index = _n
	su v1
	gen percent = v1/`r(sum)'
	gen cumpct = 0
	replace cumpct = percent if index == 1
	replace cumpct = cumpct[_n-1] + percent[_n] if index > 1
	gen agerange = subinstr(_varname, "population_", "", 10)
	drop _varname
	split agerange, parse("to")
	destring agerange1 agerange2, replace
	su index if cumpct > .5
	gen medianage = agerange2 if index == r(min)
	su medianage
	l
restore
gen age_census = `r(mean)'

* now get other stats
egen age_survey = median(age)
egen female_survey = mean(female)
egen white_survey = mean(white)
egen college_survey = mean(college)
egen homeowner_survey = mean(homeowner)

collapse age_survey age_census female_survey white_survey college_survey homeowner_survey population populationfemale populationwhite populationcollege housingunits housingunitsowneroccupied

gen female_census = populationfemale/ population
gen white_census = populationwhite/ population
gen college_census = populationcollege/ population
gen homeowner_census = housingunitsowneroccupied / housingunits
format pop* %200.0fc

local myvars age_census age_survey female_census female_survey white_c white_s college_c college_s homeowner_c homeowner_s
keep `myvars'
order `myvars'

gen index = _n
reshape i index
reshape j mode _census _survey
reshape xij age female white college homeowner
reshape long

xpose, clear varname
drop if inlist(_varname, "index", "mode")
order _varname
rename v1 Census
rename v2 Survey
list
gen sample = "Nashville"
save tmp, replace

*** memphis ***

use "dataset_memphis wave 1", clear
append using "dataset_census memphis"

* first get median age
preserve
	#delimit;
	local agecats
		18to19
		20to20
		21to21
		22to24
		25to29
		30to34
		35to39
		40to44
		45to49
		50to54
		55to59
		60to61
		62to64
		65to66
		67to69
		70to74
		75to79
		80to84
		85to90
		;
	#delimit cr
	drop population populationfemale populationwhite populationcollege
	collapse (sum) population*
	xpose, clear varname
	gen index = _n
	su v1
	gen percent = v1/`r(sum)'
	gen cumpct = 0
	replace cumpct = percent if index == 1
	replace cumpct = cumpct[_n-1] + percent[_n] if index > 1
	gen agerange = subinstr(_varname, "population_", "", 10)
	drop _varname
	split agerange, parse("to")
	destring agerange1 agerange2, replace
	su index if cumpct > .5
	gen medianage = agerange2 if index == r(min)
	su medianage
	l
restore
gen age_census = `r(mean)'

* now get other stats
egen age_survey = median(age)
egen female_survey = mean(female)
egen white_survey = mean(white)
egen college_survey = mean(college)
egen homeowner_survey = mean(homeowner)

collapse age_survey age_census female_survey white_survey college_survey homeowner_survey population populationfemale populationwhite populationcollege housingunits housingunitsowneroccupied

gen female_census = populationfemale/ population
gen white_census = populationwhite/ population
gen college_census = populationcollege/ population
gen homeowner_census = housingunitsowneroccupied / housingunits
format pop* %200.0fc

local myvars age_census age_survey female_census female_survey white_c white_s college_c college_s homeowner_c homeowner_s
keep `myvars'
order `myvars'

gen index = _n
reshape i index
reshape j mode _census _survey
reshape xij age female white college homeowner
reshape long

xpose, clear varname
drop if inlist(_varname, "index", "mode")
order _varname
rename v1 Census
rename v2 Survey
list
gen sample = "Memphis"
append using tmp
save tmp, replace

*** illinois ***

use "dataset_illinois", clear
keep if !mi(zipcode)
merge m:1 zipcode using "dataset_census illinois"
keep if _m == 3

* first get median age
preserve
	#delimit;
	local agecats
		18to19
		20to20
		21to21
		22to24
		25to29
		30to34
		35to39
		40to44
		45to49
		50to54
		55to59
		60to61
		62to64
		65to66
		67to69
		70to74
		75to79
		80to84
		85to90
		;
	#delimit cr
	drop population populationfemale populationwhite populationcollege
	collapse (sum) population*
	xpose, clear varname
	gen index = _n
	su v1
	gen percent = v1/`r(sum)'
	gen cumpct = 0
	replace cumpct = percent if index == 1
	replace cumpct = cumpct[_n-1] + percent[_n] if index > 1
	gen agerange = subinstr(_varname, "population_", "", 10)
	drop _varname
	split agerange, parse("to")
	destring agerange1 agerange2, replace
	su index if cumpct > .5
	gen medianage = agerange2 if index == r(min)
	su medianage
	l
restore
gen age_census = `r(mean)'

* now get other stats
egen populationzipcode = mean(population), by(zipcode)
egen populationfemalezipcode = mean(populationfemale), by(zipcode)
egen populationwhitezipcode = mean(populationwhite), by(zipcode)
egen populationcollegezipcode = mean(populationcollege), by(zipcode)
egen housingunitszipcode = mean(housingunits), by(zipcode)
egen housingunitsowneroccupiedzipcode = mean(housingunitsowneroccupied), by(zipcode)

egen age_survey = median(age)
egen female_survey = mean(female)
egen white_survey = mean(white)
egen college_survey = mean(college)
egen homeowner_survey = mean(homeowner)

collapse age_survey age_census female_survey white_survey college_survey homeowner_survey populationzipcode populationfemalezipcode populationwhitezipcode populationcollegezipcode housingunitszipcode housingunitsowneroccupiedzipcode, by(zipcode)
collapse age_survey age_census female_survey white_survey college_survey homeowner_survey (sum) populationzipcode populationfemalezipcode populationwhitezipcode populationcollegezipcode housingunitszipcode housingunitsowneroccupiedzipcode

gen female_census = populationfemalezipcode / populationzipcode
gen white_census = populationwhitezipcode / populationzipcode
gen college_census = populationcollegezipcode / populationzipcode
gen homeowner_census = housingunitsowneroccupiedzipcode / housingunitszipcode 
format pop* %200.0fc

local myvars age_census age_survey female_census female_survey white_c white_s college_c college_s homeowner_c homeowner_s
keep `myvars'
order `myvars'

gen index = _n
reshape i index
reshape j mode _census _survey
reshape xij age female white college homeowner
reshape long

xpose, clear varname
drop if inlist(_varname, "index", "mode")
order _varname
rename v1 Census
rename v2 Survey
list
gen sample = "Illinois"

*** Combine 3 samples ***

append using tmp
erase tmp.dta

replace Census = round(Census*100) if Census<1
replace Survey = round(Survey*100) if Survey<1

order sample
list

reshape i _varname
reshape j sample Memphis Nashville Illinois, string
reshape xij Census Survey
reshape wide

replace _varname = "Median age" if _varname == "age"
replace _varname = "Percent " + _varname if _varname != "Median age"

gen order = .
replace order = 1 if _varname == "Median age"
replace order = 2 if _varname == "Percent female"
replace order = 3 if _varname == "Percent white"
replace order = 4 if _varname == "Percent college"
replace order = 5 if _varname == "Percent homeowner"
sort order
drop order

#delimit;

listtex 
	using "table 1.tex"
	,
	replace
	type rstyle(tabular)
	head(
		"\begin{tabular*}{\hsize}{@{\hskip\tabcolsep\extracolsep\fill}l*{7}{l}}"
		"\toprule&\multicolumn{2}{c}{\underline{Memphis}}&\multicolumn{2}{c}{\underline{Nashville}}&\multicolumn{2}{c}{\underline{Illinois}}\\"
		"&Census&Sample&Census&Sample&Census&Sample""\\
		
		\midrule"
	)
	foot("\bottomrule\end{tabular*}")
	;
	
#delimit cr
